Load required libraries

library(STutility)
## Loading required package: Seurat
## Attaching SeuratObject
## Loading required package: ggplot2
## Registered S3 method overwritten by 'imager':
##   method      from
##   plot.imlist
library(ggplot2)
library(ggpubr)
library(ggrastr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(magrittr)
library(ggbreak)
## ggbreak v0.0.9
## 
## If you use ggbreak in published research, please cite the following
## paper:
## 
## S Xu, M Chen, T Feng, L Zhan, L Zhou, G Yu. Use ggbreak to effectively
## utilize plotting space to deal with large datasets and outliers.
## Frontiers in Genetics. 2021, 12:774846. doi: 10.3389/fgene.2021.774846

Assemble spaceranger output files

TODO: make datasets available for download

# Mouse brain
samples1 <- Sys.glob(paths = "../../spaceranger_output/degraded_MB/*/filtered_feature_bc_matrix.h5")
imgs1 <- Sys.glob(paths = "../../spaceranger_output/degraded_MB/*/spatial/tissue_hires_image.png")
spotfiles1 <- Sys.glob(paths = "../../spaceranger_output/degraded_MB/*/spatial/tissue_positions_list.csv")
json1 <- Sys.glob(paths = "../../spaceranger_output/degraded_MB/*/spatial/scalefactors_json.json")

# Prostate cancer
samples2 <- Sys.glob(paths = "../../spaceranger_output/prostatecancer/*/filtered_feature_bc_matrix.h5")
imgs2 <- Sys.glob(paths = "../../spaceranger_output/prostatecancer/*/spatial/tissue_hires_image.png")
spotfiles2 <- Sys.glob(paths = "../../spaceranger_output/prostatecancer/*/spatial/tissue_positions_list.csv")
json2 <- Sys.glob(paths = "../../spaceranger_output/prostatecancer/*/spatial/scalefactors_json.json")

infoTable <- data.frame(samples = c(samples1, samples2), imgs = c(imgs1, imgs2), spotfiles = c(spotfiles1, spotfiles2), json = c(json1, json2))
infoTable <- cbind(infoTable, arrayID = do.call(rbind, strsplit(infoTable$samples, "/"))[, 5])

curated_metadata <- openxlsx::read.xlsx("../../sheets/curated_RNA_rescue_sample_metadata.xlsx", sheet = 3)
curated_metadata <- setNames(curated_metadata, nm = c("storage_time", "seq_date", "include", "RNA_rescue",
                                                       "project", "experimenter", "processer", "comments",
                                                       "ID", "paper_id", "RIN", "DV200", "protocol", "source", "arrayID", "spots_under_tissue",
                                                       "genes_detected", "fraction_spots_under_tissue",
                                                       "median_genes_per_spot", "median_UMIs_per_spot", 
                                                       "saturation", "reads_mapped_to_probe_set",
                                                       "reads_mapped_confidently_to_probe_set",
                                                       "reads_mapped_confidently_to_filtered_probe_set",
                                                       "reads_mapped_to_genome",
                                                       "reads_mapped_confidently_to_genome",
                                                       "number_of_panel_genes"))

infoTable <- merge(infoTable, curated_metadata, by = "arrayID")

# Exclude FFPE data
infoTable <- subset(infoTable, !protocol %in% "FFPE")

Load mouse brain Visium data (4xRRST + 4xstandard)

# Subset infoTable to include mosuebrain data
MB_infoTable <- subset(infoTable, source == "mouse brain")
MB <- InputFromTable(infotable = MB_infoTable)
## Using spotfiles to remove spots outside of tissue
## Loading ../../spaceranger_output/degraded_MB/V10S29-134_A1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../../spaceranger_output/degraded_MB/V10S29-134_B1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../../spaceranger_output/degraded_MB/V10S29-134_C1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../../spaceranger_output/degraded_MB/V10S29-134_D1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../../spaceranger_output/degraded_MB/V11D08-304_A1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../../spaceranger_output/degraded_MB/V11D08-304_B1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../../spaceranger_output/degraded_MB/V11D08-304_C1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../../spaceranger_output/degraded_MB/V11D08-304_D1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## 
## ------------- Filtering (not including images based filtering) -------------- 
##   Spots removed:  0  
##   Genes removed:  9023  
## Saving capture area ranges to Staffli object 
## After filtering the dimensions of the experiment is: [23262 genes, 34597 spots]

Load H&E images into the MB object

MB <- LoadImages(MB, time.resolve = FALSE)
## Loading images for 8 samples: 
##   Reading ../../spaceranger_output/degraded_MB/V10S29-134_A1/spatial/tissue_hires_image.png for sample 1 ... 
##   Scaling down sample 1 image from 1979x2000 pixels to 400x404 pixels 
##   Reading ../../spaceranger_output/degraded_MB/V10S29-134_B1/spatial/tissue_hires_image.png for sample 1 ... 
##   Scaling down sample 2 image from 1979x2000 pixels to 400x404 pixels 
##   Reading ../../spaceranger_output/degraded_MB/V10S29-134_C1/spatial/tissue_hires_image.png for sample 1 ... 
##   Scaling down sample 3 image from 1979x2000 pixels to 400x404 pixels 
##   Reading ../../spaceranger_output/degraded_MB/V10S29-134_D1/spatial/tissue_hires_image.png for sample 1 ... 
##   Scaling down sample 4 image from 1979x2000 pixels to 400x404 pixels 
##   Reading ../../spaceranger_output/degraded_MB/V11D08-304_A1/spatial/tissue_hires_image.png for sample 1 ... 
##   Scaling down sample 5 image from 2000x1937 pixels to 400x387 pixels 
##   Reading ../../spaceranger_output/degraded_MB/V11D08-304_B1/spatial/tissue_hires_image.png for sample 1 ... 
##   Scaling down sample 6 image from 2000x1874 pixels to 400x375 pixels 
##   Reading ../../spaceranger_output/degraded_MB/V11D08-304_C1/spatial/tissue_hires_image.png for sample 1 ... 
##   Scaling down sample 7 image from 2000x1937 pixels to 400x387 pixels 
##   Reading ../../spaceranger_output/degraded_MB/V11D08-304_D1/spatial/tissue_hires_image.png for sample 1 ... 
##   Scaling down sample 8 image from 2000x1956 pixels to 400x391 pixels
ImagePlot(MB, ncols = 4)

Here we apply some rotations to make a rough alignment of the H&E images.

# Warp transform
MB <- WarpImages(MB, verbose = TRUE, transforms = list("1" = list(angle = 90), "2" = list(angle = -90), 
                                                       "3" = list(angle = 90), "4" = list(angle = 90),
                                                       "5" = list(angle = -90), "6" = list(angle = -90), 
                                                       "7" = list(angle = 90), "8" = list(angle = -90)))
## Creating dummy masks ...Loading masked image for sample 1 ... 
## Warping pixel coordinates for 1 ... 
## Warping image for 1 ... 
## Warping image mask for 1 ... 
## Finished alignment for sample1 
## 
## Loading masked image for sample 2 ... 
## Warping pixel coordinates for 2 ... 
## Warping image for 2 ... 
## Warping image mask for 2 ... 
## Finished alignment for sample2 
## 
## Loading masked image for sample 3 ... 
## Warping pixel coordinates for 3 ... 
## Warping image for 3 ... 
## Warping image mask for 3 ... 
## Finished alignment for sample3 
## 
## Loading masked image for sample 4 ... 
## Warping pixel coordinates for 4 ... 
## Warping image for 4 ... 
## Warping image mask for 4 ... 
## Finished alignment for sample4 
## 
## Loading masked image for sample 5 ... 
## Warping pixel coordinates for 5 ... 
## Warping image for 5 ... 
## Warping image mask for 5 ... 
## Finished alignment for sample5 
## 
## Loading masked image for sample 6 ... 
## Warping pixel coordinates for 6 ... 
## Warping image for 6 ... 
## Warping image mask for 6 ... 
## Finished alignment for sample6 
## 
## Loading masked image for sample 7 ... 
## Warping pixel coordinates for 7 ... 
## Warping image for 7 ... 
## Warping image mask for 7 ... 
## Finished alignment for sample7 
## 
## Loading masked image for sample 8 ... 
## Warping pixel coordinates for 8 ... 
## Warping image for 8 ... 
## Warping image mask for 8 ... 
## Finished alignment for sample8

Figure 1


b - Spatial map of unique genes per spot [mousebrain]


MB$protocol <- MB[[]] %>%
  mutate(protocol = ifelse(protocol == "RNA rescue", "RRST", protocol)) %>%
  pull(protocol)

p <- ST.FeaturePlot(MB, features = "nFeature_RNA", ncol = 2, indices = c(1, 5), show.sb = FALSE,
                    label.by = "protocol",  pt.size = 1.2) &
  theme(plot.title = element_blank(), legend.position = "top", 
        legend.text = element_text(angle = 60, hjust = 1, size = 14, colour = "black"),
        legend.title = element_text(size = 18, face = "bold", colour = "black", vjust = 1),
        strip.text = element_text(size = 18, face = "bold", colour = "black")) &
  labs(fill = "unique genes")
## Loading required namespace: viridis
p

jpeg(filename = "plots/mousebrain_unique_genes_spatial.jpg", width = 2200, height = 1400, res = 300)
print(p)
dev.off()

c - Violin plots of unique genes per spot [mousebrain]


p <- ggplot(MB[[]], aes(arrayID, nFeature_RNA, fill = protocol)) +
  geom_violin(scale = "width") +
  geom_boxplot(width = 0.3) +
  geom_hline(data = MB[[]] %>% dplyr::group_by(protocol) %>% summarize(Median = median(nFeature_RNA)), 
             aes("", yintercept = Median, group = protocol), linetype = "longdash") +
  geom_label(data = MB[[]] %>% dplyr::group_by(protocol) %>% summarize(Median = median(nFeature_RNA)), size = 5,
             aes("", y = Median, group = protocol, label = Median)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, size = 14, colour = "black"), 
        axis.text.y = element_text(size = 14, colour = "black"), 
        strip.text = element_text(size = 18, face = "bold", colour = "black")) +
  labs(y = "", x = "") +
  facet_grid(~protocol, scales = "free") +
  guides(fill = "none") +
  scale_x_discrete(labels = c("", "Rep1", "Rep2", "Rep3", "Rep4"))
## Warning: Ignoring unknown aesthetics: x
p

# Export plot
jpeg(filename = "plots/mousebrain_unique_genes_violin.jpg", width = 2000, height = 900, res = 300)
print(p)
dev.off()

Calculate gene attributes for mouse brain data

gene_attr <- do.call(cbind, lapply(c("RRST", "standard"), function(pro) {
  umis <- GetAssayData(MB, slot = "counts", assay = "RNA")[, MB$protocol %in% pro]
  gene_attr_protocol <- data.frame(rowSums(umis), rowMeans(umis > 0), row.names = rownames(umis))
  gene_attr_protocol <- setNames(gene_attr_protocol, nm = c(paste0(gsub(pattern = " ", replacement = "_", x = pro), "_count"), 
                                                            paste0(gsub(pattern = " ", replacement = "_", x = pro), "_det_rate")))
  return(gene_attr_protocol)
}))

gene_attr <- gene_attr %>%
  mutate("RRST_log_count" = log1p(`RRST_count`), "standard_log_count" = log1p(`standard_count`))

Subset genes to include those available in the Visium FFPE probe set.

mouse_probes <- read.csv("sheets/Visium_Mouse_Transcriptome_Probe_Set_v1.0_mm10-2020-A.csv", skip = 5, header = TRUE)
mouse_probes$gene_name <- do.call(rbind, strsplit(mouse_probes$probe_id, "\\|"))[, 2]

d - Gene-gene scatter plot of detection rate and UMI counts [mouse brain]


p1 <- ggplot(gene_attr %>% mutate(gene = rownames(.)) %>% 
               subset(gene %in% mouse_probes$gene_name), 
             aes_string("standard_log_count", "RRST_log_count")) +
  geom_point(size = 0.3) +
  ggpubr::stat_cor(digits = 2) +
  scale_x_continuous(limits = c(0, max(max(gene_attr$`RRST_log_count`), max(gene_attr$standard_log_count)))) +
  scale_y_continuous(limits = c(0, max(max(gene_attr$`RRST_log_count`), max(gene_attr$standard_log_count)))) +
  geom_abline(slope = 1, intercept = 0, color = "red", linetype = "longdash") +
  labs(x = "standard protocol", y = "RRST", title = "log1p(UMI counts)") +
  theme_minimal() +
  theme(axis.text = element_text(size = 11, colour = "black"), axis.title = element_text(size = 14, colour = "black"),
        plot.title = element_text(size = 14, face = "bold", colour = "black"))

p2 <- ggplot(gene_attr, aes_string("standard_det_rate", "RRST_det_rate")) +
  geom_point(size = 0.3) +
  geom_abline(slope = 1, intercept = 0, color = "red", linetype = "longdash") +
  labs(x = "standard protocol", y = "RRST", title = "detection rate") +
  theme_minimal() +
  theme(axis.text = element_text(size = 11, colour = "black"), axis.title = element_text(size = 14, colour = "black"),
        plot.title = element_text(size = 14, face = "bold", colour = "black"))

p1 + p2

# Export plots
jpeg(filename = "plots/mousebrain_gene_scatter_counts.jpg", width = 1000, height = 1000, res = 300)
print(p1)
dev.off()
jpeg(filename = "plots/mousebrain_gene_scatter_det_rater.jpg", width = 1000, height = 1000, res = 300)
print(p2)
dev.off()

Supplementary figure - biotype content mouse brain


gene_annotations <- read.table(file = "sheets/mgenes.tsv", header = TRUE)
rownames(gene_annotations) <- gene_annotations$gene_name

# rename gene type for mitochondrial and ribosomal protein coding genes
rp.genes <- grep(pattern = "^Rpl|^Rps", x = rownames(gene_attr), value = TRUE)
mt.genes <- grep(pattern = "^mt-", x = rownames(gene_attr), value = TRUE)
gene_annotations$gene_type <- ifelse(gene_annotations$gene_name %in% rp.genes, "ribosomal\nprotein_coding", gene_annotations$gene_type)
gene_annotations$gene_type <- ifelse(gene_annotations$gene_name %in% mt.genes, "mitochondrial", gene_annotations$gene_type)

gene_attr$gene_type <- gene_annotations[rownames(gene_attr), "gene_type"]
gene_attr <- gene_attr %>%
  mutate(observed_in = case_when(RRST_count > 0 & standard_count > 0 ~ "both",
                                 RRST_count > 0 & standard_count == 0 ~ "RRST",
                                 RRST_count == 0 & standard_count > 0 ~ "standard")) %>%
  mutate(gene = rownames(.))

lvls <- gene_attr %>% 
           group_by(gene_type) %>% 
           summarize(tot = sum(RRST_count + standard_count)) %>% 
           arrange(-tot) %>% 
           pull(gene_type)

gene_attr_summarized <- gene_attr %>% 
               reshape2::melt(measure.vars = c("RRST_count", "standard_count")) %>%
               filter(value > 0) %>%
               group_by(gene_type, variable) %>% 
               summarize(totTranscript = n(), totCount = sum(value)) %>%
               mutate(gene_type = factor(gene_type, levels = lvls), 
                      variable = ifelse(variable == "RRST_count", "RRST", "standard"))
## `summarise()` has grouped output by 'gene_type'. You can override using the
## `.groups` argument.
gene_attr_prop <- gene_attr %>% 
  group_by(gene_type, observed_in) %>%
  summarize(Freq = n()) %>%
  group_by(gene_type) %>%
  mutate(prop = Freq/sum(Freq)) %>%
  mutate(gene_type = factor(gene_type, levels = lvls))
## `summarise()` has grouped output by 'gene_type'. You can override using the
## `.groups` argument.
p1 <- ggplot(gene_attr_summarized,
             aes(gene_type, totTranscript, fill = variable)) +
  geom_bar(stat = "identity", position = position_dodge(preserve = 'single')) +
  scale_y_cut(breaks = 100, which = c(1, 2), scales = c(1, 2), space = 0.5) +
  theme_minimal() +
  theme(axis.text.x = element_blank(), axis.title.x = element_blank(), 
        axis.text.y = element_text(size = 12, colour = "black"), 
        axis.title = element_text(size = 14, colour = "black"), 
        legend.text = element_text(size = 14, colour = "black"),
        legend.title = element_text(size = 14, colour = "black")) +
  labs(y = "transcripts detected", fill = "")
p2 <- ggplot(gene_attr_prop, aes("", prop, fill = observed_in)) +
  geom_bar(stat = "identity", color = "black") + 
  coord_polar("y", start = 0) +
  scale_fill_brewer(palette = "Spectral") +
  facet_wrap(~gene_type, strip.position = "bottom", ncol = 10) +
  theme_void() +
  theme(strip.text = element_text(angle = 90, vjust = 0.5, hjust = 1, margin = margin(0.2, 0, 0.2, 0, "cm"), colour = "black", size = 12), 
        legend.text = element_text(size = 14, colour = "black"),
        legend.title = element_text(size = 14, colour = "black"))
p3 <- ggplot(gene_attr_summarized,
               aes(gene_type, totCount, fill = variable)) +
  geom_bar(stat = "identity", position = position_dodge(preserve = 'single')) +
  scale_y_cut(breaks = 1e5, which = c(1, 2), scales = c(1, 2), space = 0.5) +
  theme_minimal() +
  theme(axis.text.x = element_blank(), axis.title.x = element_blank(), 
        axis.text.y = element_text(size = 12, colour = "black"), 
        axis.title = element_text(size = 14, colour = "black"), 
        legend.text = element_text(size = 14, colour = "black"),
        legend.title = element_text(size = 14, colour = "black")) +
  labs(y = "UMI count", fill = "")

p1

p2

p3

jpeg("../Suppl_figures/Suppl_Figure_mousebrain_and_prostatecancer/mousebrain_biotype_content1.jpg", width = 2500, height = 500, res = 200, bg = NA)
print(p1)
dev.off()
jpeg("../Suppl_figures/Suppl_Figure_mousebrain_and_prostatecancer/mousebrain_biotype_content2.jpg", width = 2500, height = 500, res = 200, bg = NA)
print(p2)
dev.off()
jpeg("../Suppl_figures/Suppl_Figure_mousebrain_and_prostatecancer/mousebrain_biotype_content3.jpg", width = 2500, height = 500, res = 200, bg = NA)
print(p3)
dev.off()

Load prostate cancer data (2xRRST and 2xstandard)

PC_infoTable <- subset(infoTable, source == "prostate cancer")
PC <- InputFromTable(infotable = PC_infoTable[c(3:4, 1:2), ])
## Using spotfiles to remove spots outside of tissue
## Loading ../../spaceranger_output/prostatecancer/V11M22-349_C1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../../spaceranger_output/prostatecancer/V11M22-349_D1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../../spaceranger_output/prostatecancer/V11A20-405_C1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../../spaceranger_output/prostatecancer/V11A20-405_D1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## 
## ------------- Filtering (not including images based filtering) -------------- 
##   Spots removed:  0  
##   Genes removed:  11352  
## Saving capture area ranges to Staffli object 
## After filtering the dimensions of the experiment is: [25249 genes, 15684 spots]

Prostate cancer H&E images

PC <- LoadImages(PC, time.resolve = FALSE)
## Loading images for 4 samples: 
##   Reading ../../spaceranger_output/prostatecancer/V11M22-349_C1/spatial/tissue_hires_image.png for sample 1 ... 
##   Scaling down sample 1 image from 2000x1937 pixels to 400x387 pixels 
##   Reading ../../spaceranger_output/prostatecancer/V11M22-349_D1/spatial/tissue_hires_image.png for sample 1 ... 
##   Scaling down sample 2 image from 2000x1937 pixels to 400x387 pixels 
##   Reading ../../spaceranger_output/prostatecancer/V11A20-405_C1/spatial/tissue_hires_image.png for sample 1 ... 
##   Scaling down sample 3 image from 1979x2000 pixels to 400x404 pixels 
##   Reading ../../spaceranger_output/prostatecancer/V11A20-405_D1/spatial/tissue_hires_image.png for sample 1 ... 
##   Scaling down sample 4 image from 1979x2000 pixels to 400x404 pixels
ImagePlot(PC, ncols = 4)

Apply rigid transformations.

# Warp transform
PC <- WarpImages(PC, verbose = TRUE, transforms = list("1" = list(shift.y = -130), "3" = list(angle = 180)))
## Creating dummy masks ...Loading masked image for sample 1 ... 
## Warping pixel coordinates for 1 ... 
## Warping image for 1 ... 
## Warping image mask for 1 ... 
## Finished alignment for sample1 
## 
## Loading masked image for sample 3 ... 
## Warping pixel coordinates for 3 ... 
## Warping image for 3 ... 
## Warping image mask for 3 ... 
## Finished alignment for sample3

b - Spatial map of unique genes per spot [prostate cancer]


PC$protocol <- PC[[]] %>%
  mutate(protocol = ifelse(protocol == "RNA rescue", "RRST", protocol)) %>%
  pull(protocol)


p <- ST.FeaturePlot(PC, features = "nFeature_RNA", ncol = 2, indices = c(3, 2), 
                    label.by = "protocol",  pt.size = 1.2, show.sb = FALSE) &
  theme(plot.title = element_blank(), legend.position = "top", 
        legend.text = element_text(angle = 60, hjust = 1, size = 14, colour = "black"),
        legend.title = element_text(size = 18, face = "bold", colour = "black", vjust = 1),
        strip.text = element_text(size = 18, face = "bold", colour = "black")) &
  labs(fill = "unique genes")

p

# Export plot
jpeg(filename = "plots/prostatecancer_unique_genes_spatial.jpg", width = 2200, height = 1400, res = 300)
print(p)
dev.off()

c - Violin plots of unique genes per spot [mousebrain]


p <- ggplot(PC[[]], aes(arrayID, nFeature_RNA, fill = protocol)) +
  geom_violin(scale = "width") +
  geom_boxplot(width = 0.3) +
  geom_hline(data = PC[[]] %>% dplyr::group_by(protocol) %>% summarize(Median = median(nFeature_RNA)), 
             aes("", yintercept = Median, group = protocol), linetype = "longdash") +
  geom_label(data = PC[[]] %>% dplyr::group_by(protocol) %>% summarize(Median = median(nFeature_RNA)), 
             aes("", y = Median, group = protocol, label = Median)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, size = 14, colour = "black"), 
        axis.text.y = element_text(size = 14, colour = "black"), 
        strip.text = element_text(size = 18, face = "bold", colour = "black")) +
  labs(y = "", x = "") +
  facet_grid(~protocol, scales = "free") +
  guides(fill = "none") +
  scale_x_discrete(labels = c("", "Rep1", "Rep2"))
## Warning: Ignoring unknown aesthetics: x
p

jpeg(filename = "plots/prostatecancer_unique_genes_violin.jpg", width = 2000, height = 900, res = 300)
print(p)
dev.off()

Compute gene attributes

gene_attr <- do.call(cbind, lapply(c("RRST", "standard"), function(pro) {
  umis <- GetAssayData(PC, slot = "counts", assay = "RNA")[, PC$protocol %in% pro]
  gene_attr_protocol <- data.frame(rowSums(umis), rowMeans(umis > 0), row.names = rownames(umis))
  gene_attr_protocol <- setNames(gene_attr_protocol, nm = c(paste0(gsub(pattern = " ", replacement = "_", x = pro), "_count"), 
                                                            paste0(gsub(pattern = " ", replacement = "_", x = pro), "_det_rate")))
  return(gene_attr_protocol)
}))

gene_attr <- gene_attr %>%
  mutate("RRST_log_count" = log1p(`RRST_count`), "standard_log_count" = log1p(`standard_count`))

Subset genes to include those available in the Visium FFPE probe set.

human_probes <- read.csv("../../sheets/Visium_Human_Transcriptome_Probe_Set_v1.0_GRCh38-2020-A.csv", skip = 5, header = TRUE)
human_probes$gene_name <- do.call(rbind, strsplit(human_probes$probe_id, "\\|"))[, 2]

d - Gene-gene scatter plot of detection rate and UMI counts [prostate cancer]


p1 <- ggplot(gene_attr %>% mutate(gene = rownames(.)) %>% subset(gene %in% human_probes$gene_name), aes_string("standard_log_count", "RRST_log_count")) +
  geom_point(size = 0.3) +
  ggpubr::stat_cor() +
  scale_x_continuous(limits = c(0, max(max(gene_attr$`RRST_log_count`), max(gene_attr$standard_log_count)))) +
  scale_y_continuous(limits = c(0, max(max(gene_attr$`RRST_log_count`), max(gene_attr$standard_log_count)))) +
  geom_abline(slope = 1, intercept = 0, color = "red", linetype = "longdash") +
  labs(x = "standard protocol", y = "RRST", title = "log1p(UMI counts)") +
  theme_minimal() +
  theme(axis.text = element_text(size = 11, colour = "black"), axis.title = element_text(size = 14, colour = "black"),
        plot.title = element_text(size = 14, face = "bold", colour = "black"))

p2 <- ggplot(gene_attr, aes_string("standard_det_rate", "RRST_det_rate")) +
  geom_point(size = 0.3) +
  geom_abline(slope = 1, intercept = 0, color = "red", linetype = "longdash") +
  labs(x = "standard protocol", y = "RRST", title = "detection rate") +
  theme_minimal() +
  theme(axis.text = element_text(size = 11, colour = "black"), axis.title = element_text(size = 14, colour = "black"),
        plot.title = element_text(size = 14, face = "bold", colour = "black"))

p1 + p2

jpeg(filename = "plots/prostatecancer_gene_scatter_counts.jpg", width = 1000, height = 1000, res = 300)
print(p1)
dev.off()
jpeg(filename = "plots/prostatecancer_gene_scatter_det_rate.jpg", width = 1000, height = 1000, res = 300)
print(p2)
dev.off()

Supplementary figure - biotype content prostate cancer


gene_annotations <- read.table(file = "../../genes/hgenes.tsv", header = TRUE)
rownames(gene_annotations) <- gene_annotations$gene_name

# rename gene type for mitochondrial and ribosomal protein coding genes
rp.genes <- grep(pattern = "^RPL|^RPS", x = rownames(gene_attr), value = TRUE)
mt.genes <- grep(pattern = "^MT-", x = rownames(gene_attr), value = TRUE)
gene_annotations$gene_type <- ifelse(gene_annotations$gene_name %in% rp.genes, "ribosomal\nprotein_coding", gene_annotations$gene_type)
gene_annotations$gene_type <- ifelse(gene_annotations$gene_name %in% mt.genes, "mitochondrial", gene_annotations$gene_type)

gene_attr$gene_type <- gene_annotations[rownames(gene_attr), "gene_type"]
gene_attr <- gene_attr %>%
  mutate(observed_in = case_when(RRST_count > 0 & standard_count > 0 ~ "both",
                                 RRST_count > 0 & standard_count == 0 ~ "RNA_rescue",
                                 RRST_count == 0 & standard_count > 0 ~ "standard")) %>%
  mutate(gene = rownames(.))

gene_attr <- subset(gene_attr, !gene_type %in% c("IG_C_pseudogene", "TR_V_pseudogene"))

lvls <- gene_attr %>% 
           group_by(gene_type) %>% 
           summarize(tot = sum(RRST_count + standard_count)) %>% 
           arrange(-tot) %>% 
           pull(gene_type)

gene_attr_summarized <- gene_attr %>% 
               reshape2::melt(measure.vars = c("RRST_count", "standard_count")) %>%
               filter(value > 0) %>%
               group_by(gene_type, variable) %>% 
               summarize(totTranscript = n(), totCount = sum(value)) %>%
               mutate(gene_type = factor(gene_type, levels = lvls), 
                      variable = ifelse(variable == "RRST_count", "RRST", "standard"))
## `summarise()` has grouped output by 'gene_type'. You can override using the
## `.groups` argument.
gene_attr_prop <- gene_attr %>% 
  group_by(gene_type, observed_in) %>%
  summarize(Freq = n()) %>%
  group_by(gene_type) %>%
  mutate(prop = Freq/sum(Freq)) %>%
  mutate(gene_type = factor(gene_type, levels = lvls))
## `summarise()` has grouped output by 'gene_type'. You can override using the
## `.groups` argument.
p1 <- ggplot(gene_attr_summarized,
             aes(gene_type, totTranscript, fill = variable)) +
  geom_bar(stat = "identity", position = position_dodge(preserve = 'single')) +
  scale_y_cut(breaks = 100, which = c(1, 2), scales = c(1, 2), space = 0.5) +
  theme_minimal() +
  theme(axis.text.x = element_blank(), axis.title.x = element_blank(), 
        axis.text.y = element_text(size = 12, colour = "black"), 
        axis.title = element_text(size = 14, colour = "black"), 
        legend.text = element_text(size = 14, colour = "black"),
        legend.title = element_text(size = 14, colour = "black")) +
  labs(y = "transcripts detected", fill = "")
p2 <- ggplot(gene_attr_prop, aes("", prop, fill = observed_in)) +
  geom_bar(stat = "identity", color = "black") + 
  coord_polar("y", start = 0) +
  scale_fill_brewer(palette = "Spectral") +
  facet_wrap(~gene_type, strip.position = "bottom", ncol = 10) +
  theme_void() +
  theme(strip.text = element_text(angle = 90, vjust = 0.5, hjust = 1, margin = margin(0.2, 0, 0.2, 0, "cm"), colour = "black", size = 12), 
        legend.text = element_text(size = 14, colour = "black"),
        legend.title = element_text(size = 14, colour = "black"))
p3 <- ggplot(gene_attr_summarized,
               aes(gene_type, totCount, fill = variable)) +
  geom_bar(stat = "identity", position = position_dodge(preserve = 'single')) +
  scale_y_cut(breaks = 1e5, which = c(1, 2), scales = c(1, 2), space = 0.5) +
  theme_minimal() +
  theme(axis.text.x = element_blank(), axis.title.x = element_blank(), 
        axis.text.y = element_text(size = 12, colour = "black"), 
        axis.title = element_text(size = 14, colour = "black"), 
        legend.text = element_text(size = 14, colour = "black"),
        legend.title = element_text(size = 14, colour = "black")) +
  labs(y = "UMI count", fill = "")

p1

p2

p3

jpeg("../Suppl_figures/Suppl_Figure_mousebrain_and_prostatecancer/prostatecancer_biotype_content1.jpg", width = 2500, height = 500, res = 200, bg = NA)
print(p1)
dev.off()
jpeg("../Suppl_figures/Suppl_Figure_mousebrain_and_prostatecancer//prostatecancer_biotype_content2.jpg", width = 2500, height = 500, res = 200, bg = NA)
print(p2)
dev.off()
jpeg("../Suppl_figures/Suppl_Figure_mousebrain_and_prostatecancer//prostatecancer_biotype_content3.jpg", width = 2500, height = 500, res = 200, bg = NA)
print(p3)
dev.off()

date


date()
## [1] "Thu Aug 25 09:43:29 2022"

Session


sessionInfo()
## R version 4.1.3 (2022-03-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur/Monterey 10.16
## 
## Matrix products: default
## BLAS/LAPACK: /Users/ludviglarsson/anaconda3/envs/R4.1/lib/libopenblasp-r0.3.20.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] ggbreak_0.0.9      magrittr_2.0.3     dplyr_1.0.8        ggrastr_1.0.1     
## [5] ggpubr_0.4.0       STutility_0.1.0    ggplot2_3.3.5      SeuratObject_4.0.4
## [9] Seurat_4.1.0      
## 
## loaded via a namespace (and not attached):
##   [1] backports_1.4.1       plyr_1.8.7            igraph_1.3.0         
##   [4] lazyeval_0.2.2        sp_1.4-6              splines_4.1.3        
##   [7] listenv_0.8.0         scattermore_0.8       digest_0.6.29        
##  [10] yulab.utils_0.0.4     htmltools_0.5.2       viridis_0.6.2        
##  [13] magick_2.7.3          tiff_0.1-11           fansi_1.0.3          
##  [16] tensor_1.5            cluster_2.1.3         ROCR_1.0-11          
##  [19] openxlsx_4.2.5        globals_0.14.0        matrixStats_0.62.0   
##  [22] spatstat.sparse_2.1-1 jpeg_0.1-9            colorspace_2.0-3     
##  [25] ggrepel_0.9.1         xfun_0.30             crayon_1.5.1         
##  [28] jsonlite_1.8.0        spatstat.data_2.2-0   zeallot_0.1.0        
##  [31] survival_3.3-1        zoo_1.8-10            glue_1.6.2           
##  [34] polyclip_1.10-0       gtable_0.3.0          leiden_0.3.9         
##  [37] car_3.0-13            future.apply_1.8.1    abind_1.4-5          
##  [40] scales_1.2.0          DBI_1.1.2             rstatix_0.7.0        
##  [43] spatstat.random_2.2-0 miniUI_0.1.1.1        Rcpp_1.0.8.3         
##  [46] viridisLite_0.4.0     xtable_1.8-4          gridGraphics_0.5-1   
##  [49] reticulate_1.24       spatstat.core_2.4-2   bit_4.0.4            
##  [52] htmlwidgets_1.5.4     httr_1.4.2            RColorBrewer_1.1-3   
##  [55] ellipsis_0.3.2        ica_1.0-2             farver_2.1.0         
##  [58] pkgconfig_2.0.3       sass_0.4.1            uwot_0.1.11          
##  [61] deldir_1.0-6          utf8_1.2.2            labeling_0.4.2       
##  [64] ggplotify_0.1.0       tidyselect_1.1.2      rlang_1.0.2          
##  [67] reshape2_1.4.4        later_1.3.0           munsell_0.5.0        
##  [70] tools_4.1.3           cli_3.2.0             generics_0.1.2       
##  [73] broom_0.8.0           ggridges_0.5.3        evaluate_0.15        
##  [76] stringr_1.4.0         fastmap_1.1.0         yaml_2.3.5           
##  [79] goftest_1.2-3         bit64_4.0.5           knitr_1.38           
##  [82] fitdistrplus_1.1-8    zip_2.2.0             purrr_0.3.4          
##  [85] RANN_2.6.1            readbitmap_0.1.5      pbapply_1.5-0        
##  [88] future_1.24.0         nlme_3.1-157          mime_0.12            
##  [91] aplot_0.1.4           hdf5r_1.3.5           compiler_4.1.3       
##  [94] rstudioapi_0.13       beeswarm_0.4.0        plotly_4.10.0        
##  [97] png_0.1-7             ggsignif_0.6.3        spatstat.utils_2.3-0 
## [100] tibble_3.1.6          bslib_0.3.1           stringi_1.7.6        
## [103] highr_0.9             lattice_0.20-45       Matrix_1.4-1         
## [106] shinyjs_2.1.0         vctrs_0.4.1           pillar_1.7.0         
## [109] lifecycle_1.0.1       spatstat.geom_2.4-0   lmtest_0.9-40        
## [112] jquerylib_0.1.4       RcppAnnoy_0.0.19      data.table_1.14.2    
## [115] cowplot_1.1.1         irlba_2.3.5           raster_3.5-15        
## [118] httpuv_1.6.5          patchwork_1.1.1       R6_2.5.1             
## [121] imager_0.42.13        promises_1.2.0.1      KernSmooth_2.23-20   
## [124] gridExtra_2.3         bmp_0.3               vipor_0.4.5          
## [127] parallelly_1.31.0     codetools_0.2-18      MASS_7.3-56          
## [130] assertthat_0.2.1      withr_2.5.0           sctransform_0.3.3    
## [133] mgcv_1.8-40           parallel_4.1.3        terra_1.5-21         
## [136] ggfun_0.0.6           grid_4.1.3            rpart_4.1.16         
## [139] tidyr_1.2.0           rmarkdown_2.13        carData_3.0-5        
## [142] Rtsne_0.16            shiny_1.7.1           ggbeeswarm_0.6.0